# ---------------------------
# R version: 3.6.1
# Code: 10/29/2019
# Title: 'Congressional Oversight Revisited: Politics and Procedure in Agency Rulemaking'
# Authors: Kenneth Lowande and Rachel Potter
# Summary: Replication code for producing all tables and figures. 
# Please report errors to: rapotter@virginia.edu
# ---------------------------

cat("\014")
rm(list=ls())

# install.packages(c('ggplot2','aod','mfx','xtable','ordinal')

require(mfx)
require(ggplot2)
require(aod)
require(xtable)
require(ordinal)

setwd('~/replication files')

# ---------------------------
# DATA
# ---------------------------
load('rules-comments-1.Rda')
load('rules-comments-2.Rda')
load('rules-comments-3.Rda')
load('rules-comments-4.Rda')
load('rules-comments-5.Rda')
load('rules-comments-6.Rda')
load('sim-results.Rda')
load('firms.Rda')
load('firm-comments.Rda')
load('mcs.Rda')
load('mc-comments.Rda')
load('all-comments.Rda')

# ---------------------------
# PRINT ARTICLE
# ---------------------------

# FIGURE 2: Proposal and Status Quo Locations for Select Proposed Rules
ex=c("EPA-HQ-RCRA-2009-0640-0352","EPA-HQ-OW-2011-0880-0001","EPA-HQ-OAR-2011-0135-3008","EPA-HQ-OAR-2002-0051-0035" )
ran=data.frame(cfscore=seq(-2,2,length.out=400))

X=dat.comments[dat.comments$proposal.documentId==ex[1],]
m1=clm(response.sq~cfscore,data=X,link='probit')
m2=clm(proposal.sq~cfscore,data=X,link='probit')
p1=predict(m1,ran,type='prob',se.fit=T)
p2=predict(m2,ran,type='prob',se.fit=T)
ran$prob.q=p1$fit[,'2'] 
ran$prob.x=p2$fit[,'2'] 
ran$se.q=p1$se.fit[,'2'] 
ran$se.x=p2$se.fit[,'2'] 

f2a=ggplot(ran)+geom_line(aes(x=cfscore,y=prob.q))+geom_line(aes(x=cfscore,y=prob.x))+
  labs(x='Conservativism',y='Predicted Probability of Preferred Policy') +
  geom_vline(xintercept=ran$cfscore[ran$prob.q==max(ran$prob.q)],color='#000000',size=2,linetype='dashed') + 
  geom_vline(xintercept=ran$cfscore[ran$prob.x==max(ran$prob.x)],color='#202020',size=2) +
  geom_ribbon(aes(x=cfscore,ymin=prob.q-(1.96*se.q),ymax=prob.q+(1.96*se.q)),alpha=.25) +
  geom_ribbon(aes(x=cfscore,ymin=prob.x-(1.96*se.x),ymax=prob.x+(1.96*se.x)),alpha=.25) +
  theme_bw() + theme(legend.title=element_blank()) + 
  annotate("segment", x = 1.5, xend = 0, y = 0.14, yend = 0.14, colour = "black", size=1, alpha=1, arrow=arrow()) +
  ggtitle('Disposal of Coal Combustion Residuals from Electric Utilities (2014)')

X=dat.comments[dat.comments$proposal.documentId==ex[2],]
m1=clm(response.sq~cfscore,data=X,link='probit')
m2=clm(proposal.sq~cfscore,data=X,link='probit')
p1=predict(m1,ran,type='prob',se.fit=T)
p2=predict(m2,ran,type='prob',se.fit=T)
ran$prob.q=p1$fit[,'2'] 
ran$prob.x=p2$fit[,'2'] 
ran$se.q=p1$se.fit[,'2'] 
ran$se.x=p2$se.fit[,'2'] 

f2b=ggplot(ran)+geom_line(aes(x=cfscore,y=prob.q))+geom_line(aes(x=cfscore,y=prob.x))+
  labs(x='Conservativism',y='Predicted Probability of Preferred Policy') +
  geom_vline(xintercept=ran$cfscore[ran$prob.q==max(ran$prob.q)],color='#000000',size=2,linetype='dashed') + 
  geom_vline(xintercept=ran$cfscore[ran$prob.x==max(ran$prob.x)],color='#202020',size=2) +
  geom_ribbon(aes(x=cfscore,ymin=prob.q-(1.96*se.q),ymax=prob.q+(1.96*se.q)),alpha=.25) +
  geom_ribbon(aes(x=cfscore,ymin=prob.x-(1.96*se.x),ymax=prob.x+(1.96*se.x)),alpha=.25) +
  theme_bw() + theme(legend.title=element_blank()) + 
  annotate("segment", x = 0, xend = -1.4, y = 0, yend = 0, colour = "black", size=1, alpha=1, arrow=arrow()) +
  ggtitle('Waters of the US (2014)')

X=dat.comments[dat.comments$proposal.documentId==ex[3],]
m1=clm(response.sq~cfscore,data=X,link='probit')
m2=clm(proposal.sq~cfscore,data=X,link='probit')
p1=predict(m1,ran,type='prob',se.fit=T)
p2=predict(m2,ran,type='prob',se.fit=T)
ran$prob.q=p1$fit[,'2'] 
ran$prob.x=p2$fit[,'2'] 
ran$se.q=p1$se.fit[,'2'] 
ran$se.x=p2$se.fit[,'2'] 

f2c=ggplot(ran)+geom_line(aes(x=cfscore,y=prob.q))+geom_line(aes(x=cfscore,y=prob.x))+
  labs(x='Conservativism',y='Predicted Probability of Preferred Policy') +
  geom_vline(xintercept=ran$cfscore[ran$prob.q==max(ran$prob.q)],color='#000000',size=2,linetype='dashed') + 
  geom_vline(xintercept=ran$cfscore[ran$prob.x==max(ran$prob.x)],color='#202020',size=2) +
  geom_ribbon(aes(x=cfscore,ymin=prob.q-(1.96*se.q),ymax=prob.q+(1.96*se.q)),alpha=.25) +
  geom_ribbon(aes(x=cfscore,ymin=prob.x-(1.96*se.x),ymax=prob.x+(1.96*se.x)),alpha=.25) +
  theme_bw() + theme(legend.title=element_blank()) + 
  annotate("segment", x = .75, xend = -.75, y = -.09, yend = -.09, colour = "black", size=1, alpha=1, arrow=arrow()) +
  ggtitle('Tier 3 Motor Vehicle Emission and Fuel Standards')

X=dat.comments[dat.comments$proposal.documentId==ex[4],]
m1=clm(response.sq~cfscore,data=X,link='probit')
m2=clm(proposal.sq~cfscore,data=X,link='probit')
p1=predict(m1,ran,type='prob',se.fit=T)
p2=predict(m2,ran,type='prob',se.fit=T)
ran$prob.q=p1$fit[,'2'] 
ran$prob.x=p2$fit[,'2'] 
ran$se.q=p1$se.fit[,'2'] 
ran$se.x=p2$se.fit[,'2'] 

f2d=ggplot(ran)+geom_line(aes(x=cfscore,y=prob.q))+geom_line(aes(x=cfscore,y=prob.x))+
  labs(x='Conservativism',y='Predicted Probability of Preferred Policy') +
  geom_vline(xintercept=ran$cfscore[ran$prob.q==max(ran$prob.q)],color='#000000',size=2,linetype='dashed') + 
  geom_vline(xintercept=ran$cfscore[ran$prob.x==max(ran$prob.x)],color='#202020',size=2) +
  geom_ribbon(aes(x=cfscore,ymin=prob.q-(1.96*se.q),ymax=prob.q+(1.96*se.q)),alpha=.25) +
  geom_ribbon(aes(x=cfscore,ymin=prob.x-(1.96*se.x),ymax=prob.x+(1.96*se.x)),alpha=.25) +
  theme_bw() + theme(legend.title=element_blank()) + 
  annotate("segment", x = 0.1, xend = -1.4, y = -0.2, yend = -0.2, colour = "black", size=1, alpha=1, arrow=arrow()) +
  ggtitle('Mandator Reporting of Greenhouse Gases (2009)')
# ---------------------------

# FIGURE 3: Marginal Increase in Probability of Commenting on EPA Rules, by Ideological Quintile
m1=logitmfx(substantive~congress+rule+comm+chair+ranking+senate+firm+distance.ord,data=dt1,robust=T,clustervar1='id') # m1 -- substantive, full sample
m2=logitmfx(procedural~congress+rule+comm+chair+ranking+senate+firm+distance.ord,data=dt2,robust=T,clustervar1='id') # m2 -- procedural, full sample
m3=logitmfx(substantive~congress+rule+comm+chair+ranking+senate+distance.ord,data=dt3,robust=T,clustervar1='id') # m3 -- substantive, mcs
m4=logitmfx(procedural~congress+rule+comm+chair+ranking+senate+distance.ord,data=dt4,robust=T,clustervar1='id') # m4 -- procedural, mcs
m5=logitmfx(substantive~congress+rule+distance.ord,data=dt5,robust=T,clustervar1='id') # m5 -- substantive, firms
m6=logitmfx(procedural~congress+rule+distance.ord,data=dt6,robust=T,clustervar1='id') # m6 -- procedural, firms

est=data.frame(coef=numeric(16),se=numeric(16),model=numeric(16),Commenter=numeric(16),var=numeric(16))
est[1,1:5]=c(m3$mfxest[27,1],m3$mfxest[27,2],'Substance','Congress','Nearly Aligned')
est[2,1:5]=c(m3$mfxest[28,1],m3$mfxest[28,2],'Substance','Congress','Slight Disagreement')
est[3,1:5]=c(m3$mfxest[29,1],m3$mfxest[29,2],'Substance','Congress','Moderate Disagreement')
est[4,1:5]=c(m3$mfxest[30,1],m3$mfxest[30,2],'Substance','Congress','High Disagreement')
est[5,1:5]=c(m4$mfxest[15,1],m4$mfxest[15,2],'Procedure','Congress','Nearly Aligned')
est[6,1:5]=c(m4$mfxest[16,1],m4$mfxest[16,2],'Procedure','Congress','Slight Disagreement')
est[7,1:5]=c(m4$mfxest[17,1],m4$mfxest[17,2],'Procedure','Congress','Moderate Disagreement')
est[8,1:5]=c(m4$mfxest[18,1],m4$mfxest[18,2],'Procedure','Congress','High Disagreement')
est[9,1:5]=c(m5$mfxest[31,1],m5$mfxest[31,2],'Substance','Industry','Nearly Aligned')
est[10,1:5]=c(m5$mfxest[32,1],m5$mfxest[32,2],'Substance','Industry','Slight Disagreement')
est[11,1:5]=c(m5$mfxest[33,1],m5$mfxest[33,2],'Substance','Industry','Moderate Disagreement')
est[12,1:5]=c(m5$mfxest[34,1],m5$mfxest[34,2],'Substance','Industry','High Disagreement')
est[13,1:5]=c(m6$mfxest[11,1],m6$mfxest[11,2],'Procedure','Industry','Nearly Aligned')
est[14,1:5]=c(m6$mfxest[12,1],m6$mfxest[12,2],'Procedure','Industry','Slight Disagreement')
est[15,1:5]=c(m6$mfxest[13,1],m6$mfxest[13,2],'Procedure','Industry','Moderate Disagreement')
est[16,1:5]=c(m6$mfxest[14,1],m6$mfxest[14,2],'Procedure','Industry','High Disagreement')
est$coef=as.numeric(est$coef)
est$se=as.numeric(est$se)
est$model=as.factor(est$model)
est$var=as.factor(est$var)
est$Commenter=as.factor(est$Commenter)
interval1 <- -qnorm((1-0.9)/2) 
interval2 <- -qnorm((1-0.95)/2)
est$var=factor(est$var,levels(est$var)[c(1,2,4,3)])

f3a=ggplot(est[est$model=='Procedure',], aes(colour = Commenter)) + scale_colour_manual(values = c("#000000", "#A8A8A8")) +
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) + 
  geom_linerange(aes(x = var, ymin = coef - se*interval1, ymax = coef + se*interval1),lwd = 1, 
                 position = position_dodge(width = 1/2)) +
  geom_pointrange(aes(x = var, y = coef, ymin = coef - se*interval2,ymax = coef + se*interval2),
                  lwd = 1/2, position = position_dodge(width = 1/2),shape = 21, fill = "WHITE") +
  coord_flip() + theme_bw() + xlab('') + ylab('Marginal Increase in Probability of Comment')
f3b=ggplot(est[est$model=='Substance',], aes(colour = Commenter)) + scale_colour_manual(values = c("#000000", "#A8A8A8")) +
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) + 
  geom_linerange(aes(x = var, ymin = coef - se*interval1, ymax = coef + se*interval1),lwd = 1, 
                 position = position_dodge(width = 1/2)) +
  geom_pointrange(aes(x = var, y = coef, ymin = coef - se*interval2,ymax = coef + se*interval2),
                  lwd = 1/2, position = position_dodge(width = 1/2),shape = 21, fill = "WHITE") +
  coord_flip() + theme_bw() + xlab('') + ylab('Marginal Increase in Probability of Comment')
# ---------------------------

# ---------------------------
# SUPPLEMENTARY INFORMATION
# ---------------------------

# TABLE SI-1: Frequent Commenters on EPA Rules
x1=as.data.frame(round(rev(sort(table(firm.comments$name)))[1:10]/nrow(firm.comments),digits=3))
x2=as.data.frame(rev(sort(table(firm.comments$name)))[1:10])
x=merge(x1,x2,by='Var1')
x$Var1=as.character(x$Var1)
x=x[order(-x$Freq.x),]
y1=as.data.frame(round(rev(sort(table(mc.comments$name)))[1:10]/nrow(mc.comments),digits=3))
y2=as.data.frame(rev(sort(table(mc.comments$name)))[1:10])
y=merge(y1,y2,by='Var1')
y$Var1=as.character(y$Var1)
y=y[order(-y$Freq.x),]
p=rbind(x,y)
rownames(p) <- NULL
xtable(p[,1:3])
# ---------------------------

# Figure SI-1: Ideological Distribution of Congressional and Firm Commenters
fsi1a=ggplot() +  geom_density(data=firms, alpha=1, aes(x=cfscore,fill=1)) + geom_density(data=firm.comments, alpha=.7, aes(x=cfscore,fill=2)) +
  scale_fill_gradient(low="#00274c",high="#e9f4ff") + theme_bw() +
  theme(strip.background = element_blank(),legend.position="none") + 
  xlab('CF Score') + ylab('Density') +
  annotate("text", x=-.6, y=0.5, label= "Fortune 500") +
  annotate("text", x=.85, y=1.15, label= "Comments")
fsi1b=ggplot() +  geom_density(data=mcs, alpha=1, aes(x=cfscore,fill=1)) + geom_density(data=mc.comments, alpha=.7, aes(x=cfscore,fill=2)) +
  scale_fill_gradient(low="#00274c",high="#e9f4ff") + theme_bw() +
  theme(strip.background = element_blank(),legend.position="none") + 
  xlab('CF Score') + ylab('Density') +
  annotate("text", x=.1, y=0.3, label= "Congress") +
  annotate("text", x=1.45, y=0.7, label= "Comments")
# ---------------------------

# TABLE SI-2: Diagnostics for Estimated Proposal and Status Quo Positions
# this information can be found in 'est-diagnostics.Rda'
# ---------------------------

# Figure SI-2: Bias Patterns in Simulated Commenting Behavior; for simulation code see 'bias-sim.R'
sims=rbind(simulations,A)
simfig1a=ggplot(sims, aes(y=Bias,x=as.factor(round(Beta_NR,2)))) + geom_boxplot() + theme_bw() + xlab('Beta (Non-Reponse)') + ylab('Bias') # Non-Reponse Beta and Bias
simfig1b=ggplot(sims, aes(y=Bias,x=as.factor(round(Beta,2)))) + geom_boxplot() + theme_bw() + xlab('Beta') + ylab('Bias') # Beta and Bias
simfig1c=ggplot(sims, aes(y=Bias,x=as.factor(round(Window,2)))) + geom_boxplot() + theme_bw() + xlab('Window') + ylab('Bias') # Window and Bias
simfig1d=ggplot(sims, aes(y=Bias,x=as.factor(Commenters))) + geom_boxplot() + theme_bw() + xlab('Commenters') + ylab('Bias') # Beta and Bias
# ---------------------------

# Table SI-5: Congress and Industry Participation in EPA Rulemaking, 2007-2017
# see code for FIGURE 3, and model objects m1-6
# ---------------------------

# Table SI-6: Congress and Industry Participation in EPA Rulemaking, 2007-2017 (LPM)
lpm1=lm(substantive~congress+rule+comm+chair+ranking+senate+firm+distance.ord,data=dt1) # m1 -- substantive, full sample
lpm2=lm(procedural~congress+rule+comm+chair+ranking+senate+firm+distance.ord,data=dt2) # m2 -- procedural, full sample
lpm3=lm(substantive~congress+rule+comm+chair+ranking+senate+distance.ord,data=dt3) # m3 -- substantive, mcs
lpm4=lm(procedural~congress+rule+comm+chair+ranking+senate+distance.ord,data=dt4) # m4 -- procedural, mcs
lpm5=lm(substantive~congress+rule+distance.ord,data=dt5) # m5 -- substantive, firms
lpm6=lm(procedural~congress+rule+distance.ord,data=dt6) # m6 -- procedural, firms
# ---------------------------

# Table SI-7: Congress and Industry Participation in EPA Rulemaking, 2007-2017 (Continuous)
cm1=logitmfx(substantive~congress+rule+comm+chair+ranking+senate+firm+dist.cont,data=dt1,robust=T,clustervar1='id') # m1 -- substantive, full sample
cm2=logitmfx(procedural~congress+rule+comm+chair+ranking+senate+firm+dist.cont,data=dt2,robust=T,clustervar1='id') # m2 -- procedural, full sample
cm3=logitmfx(substantive~congress+rule+comm+chair+ranking+senate+dist.cont,data=dt3,robust=T,clustervar1='id') # m3 -- substantive, mcs
cm4=logitmfx(procedural~congress+rule+comm+chair+ranking+senate+dist.cont,data=dt4,robust=T,clustervar1='id') # m4 -- procedural, mcs
cm5=logitmfx(substantive~congress+rule+dist.cont,data=dt5,robust=T,clustervar1='id') # m5 -- substantive, firms
cm6=logitmfx(procedural~congress+rule+dist.cont,data=dt6,robust=T,clustervar1='id') # m6 -- procedural, firms
# ---------------------------

# Table SI-8: Robustness Checks Incorporating Preference and Proposal Uncertainty, 2007-2017
# ---------------------------

# Table SI-9: Divided Government and Participation in EPA Rulemaking, 2007-2017
d1=logitmfx(substantive~dg+congress+rule+comm+chair+ranking+senate+firm+distance.ord,data=dt1,robust=T,clustervar1='id') # d1 -- substantive, full sample
d2=logitmfx(procedural~dg+congress+rule+comm+chair+ranking+senate+firm+distance.ord,data=dt2,robust=T,clustervar1='id') # d2 -- procedural, full sample
d3=logitmfx(substantive~dg+congress+rule+comm+chair+ranking+senate+distance.ord,data=dt3,robust=T,clustervar1='id') # d3 -- substantive, mcs
d4=logitmfx(procedural~dg+congress+rule+comm+chair+ranking+senate+distance.ord,data=dt4,robust=T,clustervar1='id') # d4 -- procedural, mcs
d5=logitmfx(substantive~dg+congress+rule+distance.ord,data=dt5,robust=T,clustervar1='id') # d5 -- substantive, firms
d6=logitmfx(procedural~dg +congress +rule+distance.ord,data=dt6,robust=T,clustervar1='id') # d6 -- procedural, firms
# ---------------------------

# Table SI-10: Environmental Lobbying and Firm Participation in EPA Rulemaking, 2007-2017
lm5=logitmfx(substantive~congress+rule+distance.ord+ENVlobbying,data=dt5,robust=T,clustervar1='id') #lm5 -- substantive, firms
lm6=logitmfx(procedural~congress+rule+distance.ord+ENVlobbying,data=dt6,robust=T,clustervar1='id') #lm6 -- procedural, firms
# ---------------------------
